/*============================================================================*/

* Do: Regressions Part 2 - ENAHO
* Last update: 09.05.2023

/*============================================================================*/

* ==============================================================================
* SECTION: FOLDERS
* ==============================================================================

*------------------------ SUBFOLDERS (DO NOT CHANGE)
gl in "$root/Rawdata"
gl out "$root/Final"
gl analysis "$root/Analysis"

* ==============================================================================
* SECTION: REGRESSIONS
* ==============================================================================


cap log close
log using "datacreate_part2", replace

use "$in/district_long", clear
drop if year==2021
bys ubigeo (year month): gen totnumvz_ubigeo = sum(numvz)
bys ubigeo (year month): gen totptp_ubigeo = sum(requestptp)

la var totnumvz_ubigeo "Registered VZs"
la var totptp_ubigeo "VZs w/ PTP"

keep ubigeo year month totnumvz_ubigeo totptp_ubigeo
tempfile admin
save `admin', replace

use "$in/province_long", clear
drop if year==2021

bys province_id (year month): gen totnumvz_prov = sum(numvz)
bys province_id (year month): gen totptp_prov = sum(requestptp)

la var totnumvz_prov "Registered VZs"
la var totptp_prov "VZs w/ PTP"

keep province_id year month totnumvz_prov totptp_prov
tempfile adminp
save `adminp', replace

* placebo data
replace year = year - 6
drop totptp_prov
ren totnumvz_prov totnumvz_prov_placebom6
la var totnumvz_prov_placebom6 "Registered VZs Six Years Later"

tempfile adminpp
save `adminpp', replace

use "$in/enaho", clear
gen province_id = substr(ubigeo,1,4)

merge n:1 province_id year month using `adminp', keep(match master)
replace totnumvz_prov=0 if _merge==1
replace totptp_prov=0 if _merge==1

drop _merge

merge n:1 province_id year month using `adminpp', keep(match master)
replace totnumvz_prov_placebom6=0 if _merge==1  & year<2015

drop _merge

merge n:1 ubigeo year month using `admin', keep(match master)

replace totnumvz_ubigeo=0 if _merge==1
replace totptp_ubigeo=0 if _merge==1

drop _merge

* Outcomes: Trust in different institutions
la var trust1 "JNE"
la var trust2 "ONPE"
la var trust3 "RENIEC"
la var trust4 "Province municipality"
la var trust5 "District municipality"
la var trust6 "Police"
la var trust7 "Army"
la var trust8 "Regional government"
la var trust9 "Judicial Power"
la var trust10 "Ministry of Education"
la var trust11 "Ombudsman Office"
la var trust12 "Congress"
la var trust13 "Political Parties"
la var trust14 "Newspapers"
la var trust15 "Radio/TV"
la var trust16 "Catholic Church"

* Outcomes: Perception of main country problems
la var countryprob1  "Corruption"
la var countryprob2  "Government credibility and transparency"
la var countryprob3  "Lack of employment"
la var countryprob4  "Lack of citizen security"
la var countryprob5  "Domestic violence"
la var countryprob6  "Lack of coverage and poor public health care"
la var countryprob7  "Lack of coverage in social security system"
la var countryprob8  "Poor public education"
la var countryprob9  "Human rights violation"
la var countryprob10 "Low wages and higher prices"
la var countryprob11 "Poverty"
la var countryprob12 "Homelessness"
la var countryprob13 "Lack of support to agriculture"
la var countryprob14 "Malfunction of democracy"

la var countryprobrank1  "Corruption"
la var countryprobrank2  "Government credibility and transparency"
la var countryprobrank3  "Lack of employment"
la var countryprobrank4  "Lack of citizen security"
la var countryprobrank5  "Domestic violence"
la var countryprobrank6  "Lack of coverage and poor public health care"
la var countryprobrank7  "Lack of coverage in social security system"
la var countryprobrank8  "Poor public education"
la var countryprobrank9  "Human rights violation"
la var countryprobrank10 "Low wages and higher prices"
la var countryprobrank11 "Poverty"
la var countryprobrank12 "Homelessness"
la var countryprobrank13 "Lack of support to agriculture"
la var countryprobrank14 "Malfunction of democracy"

* Keep working age individuals
keep if workage == 1
drop if frgnimmigrant==1

* create province level info 

merge n:1 province_id using "$in/population_prov17"
drop _merge

merge n:1 province_id using "$in/vzborn_prov07"
replace vzborn_prov07=0 if _merge==1
drop _merge

destring region_id ubigeo province_id, replace

g informalemployrate07 = (informalemploy_ubigeo07 / workgage_ubigeo07)
g formalemployrate07 = (formalemploy_ubigeo07 / workgage_ubigeo07)

* Outcomes

qui foreach var of varlist trust* share_combined informalemployrate07 formalemployrate07 totnumvz_ubigeo totptp_ubigeo totnumvz_prov totptp_prov vzborn_ubigeo07 {
qui su `var'
gen `var'_std = (`var' - r(mean))/r(sd)
}

gen logsharevz = ln((totnumvz_prov+1)/(population_prov17+1))
gen logsharevz07 = ln((vzborn_prov07+1)/(population_prov17+1))

gen sharevz = totnumvz_prov/population_prov17
gen sharevz07 = vzborn_prov07/population_prov17
gen sharevz_placebom6 = totnumvz_prov_placebom6 / population_prov17

gen logsharevz_placebom6 = ln((totnumvz_prov_placebom6+1)/(population_prov17+1))

egen trust_media = rmean(trust14_std trust15_std)
egen trust_police = rmean(trust6_std trust7_std)
egen trust_localgovt = rmean(trust4_std trust5_std trust8_std)

ren countryprob4 problem_crime
ren countryprob3 problem_emp
ren countryprob1 problem_corrupt

egen crime_violent = rsum(crime1_6 crime1_9 crime1_14 crime1_17)
egen crime_nonviolent = rsum(crime1_1 crime1_2 crime1_4 crime1_5 crime1_7 crime1_10 crime1_12 crime1_13 crime1_15 crime1_16)

ren crime1_3 crime_discrimination
ren crime1_8 crime_antiimmigrant

recode crime_violent crime_nonviolent crime_discrimination crime_antiimmigrant (0=.) if year<2011 | year==2020
recode crime_nonviolent crime_discrimination crime_antiimmigrant (0=.) if year==2011

gen log_crime_violent = ln(crime_violent+1)
gen log_crime_nonviolent = ln(crime_nonviolent+1)

gen region_year = region_id*10000 + year


* Education groups
g education_group = .
	replace education_group = 1 if education < 6 // less than secondary
	replace education_group = 2 if education == 6 // complete secondary
	replace education_group = 3 if education > 6 & education <= 8 // technical (complete+incomplete)
	replace education_group = 4 if education > 8 // university (undergradute+graduate)
	la define education_group 1 "Less than secondary" 2 "Complete secondary" 3 "Technical" 4 "University", replace
	la values education_group education_group
	la var education_group "Education"

* Wage
foreach w in wage1 wage2 {
	g `w'_1000 = `w'/1000
	g ln`w' = ln(`w')
}

la var wage2_1000 "Labor income (1000s)"
la var lnwage2 "Log Net Labor income"

gen lnhhexp = log(hhexpend_tot) 
gen lnhhinc = log(hhincome1_tot)

replace formalemploy=0 if employment_status==0

* Marital status

recode marital 1/2=1 3/5=2 6=3, gen(marital_status)

	la define ms 1 "Married/Cohabitation" 2"Divorced/Widowed/Separated" 3"Single"
	la values marital_status ms

* Trends (linear)

gen time = year-2006 + ((month-1)/12)

gen female = gender==0

replace duration_Border = duration_Border/24/60/60


*------------------------ LABELS
* Covariates
la var employment_status "Employed"


save "$out/data_regressions_enaho", replace


use "$out/data_regressions_enaho", clear

preserve

collapse (mean) vzborn_ubigeo07 vzborn_ubigeo17 (count) female, by(ubigeo)

la var vzborn_ubigeo07 "Venezuelan-Born 2007 by District"

twoway (scatter vzborn_ubigeo17 vzborn_ubigeo07 [fw= female], m(Oh) ylab(0(1000)4000) xlab(0(50)250) ytitle("Venezuelan-Born 2017 by District")) (lfit vzborn_ubigeo17 vzborn_ubigeo07 [fw= female], range(. 230)), legend(off) saving("$analysis/vzborn17", replace)

restore

drop if year<2015

collapse (mean) logsharevz logsharevz07 sharevz sharevz07 (count) female, by(province_id year)

replace logsharevz = -12 if sharevz==0 | logsharevz < -12
replace logsharevz07 = -12 if sharevz07==0 | logsharevz07 < -12

la var logsharevz07 "Percent Venezuelan-Born 2007"
la var logsharevz "Percent Registered VZs"

label def ls -12 "0%" -11 "0.002%" -10 "0.005%" -9 "0.01%" -8 "0.03%" -6 "0.25%" -3 "5%" 

la val logsharevz07 ls
la val logsharevz ls

forv i = 2015/2020 {
twoway (scatter logsharevz logsharevz07 if year==`i' [fw= female], m(Oh) ylab(-12(3)-3, valuelabel) xlab(-12(1)-8, valuelabel) ytitle("Percent Registered VZs `i'")) (lfit logsharevz logsharevz07 [fw= female] if year==`i'), legend(off) saving("$analysis/totnumvz_`i'", replace) 
}

graph combine "$analysis/totnumvz_2015.gph" "$analysis/totnumvz_2016.gph" "$analysis/totnumvz_2017.gph" "$analysis/totnumvz_2018.gph" "$analysis/totnumvz_2019.gph" "$analysis/totnumvz_2020.gph", saving("$analysis/1ststage_part2_totnumvz", replace) imargin(medsmall)

graph export "$analysis/1ststage_part2_totnumvzfinal.pdf", as(pdf) replace


log close
